<?php 
class PicoPygments extends AbstractPicoPlugin
{
  const API_VERSION=3;

  // only act if this var is set to true
  private $yeah=FALSE;

  // display a label with the language in a corner?
  private $label=FALSE;
  // setting up formatteroptions string to pass to python.
  private $formatteroptions='nowrap=True';
  // setting up lexeroptions string to pass to python.
  private $lexeroptions='encoding="utf-8"';
  // caching is on by default
  private $cache_dir="/dev/shm/cache/pico-production/pygments";
  // parsedown prepends this to the language for code block classes, e.g.: <code class="language-php">... not (currently) configurable
  private $langprefix="language-";
  // stylesheet
  private $stylesheet='';
  // CSS prefix to ignore when searching for colors
  private $cssprefix='custom-';
  // stylesheet fallback
  private $stylesheet_fallback='_var_with_fallback';

  private function __msg__($str,$xtra="") {
    if($this->debug === TRUE) {
      $arrow='⇛ ';
      // normal debug output
      echo $arrow."PicoPygments (".debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS,2)[1]['function']."): ".$str."<br/>\n";
      // additional (super verbose) debug output
      if($xtra !== "" && $this->debug_xtra === TRUE) echo $arrow."PicoPygments (".debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS,2)[1]['function']."): ".$xtra."<br/>\n";
    }
  }
  private function array_info($array,$sep='') {
    foreach($array as $k => $v) { echo $sep.$k." => ".$v."\n"; if(is_array($v)) $this->array_info($v,"\t"); }
  }

  public function onConfigLoaded(array &$config)
  { // debugging output yes or no
    if($config['debug'] === TRUE) { $this->debug=TRUE; ini_set('display_errors', 'On'); error_reporting(E_ALL); $this->__msg__("Debug output enabled."); }
      else $this->debug=FALSE;
    if(isset($config['PicoPygments']['debug_xtra']) && $config['PicoPygments']['debug_xtra'] === TRUE) $this->debug_xtra=TRUE;
      else $this->debug_xtra=FALSE;

    //////////////// User-configurable variables
    // twig template to activate this plugin for - default: post
    if (isset($config['PicoPygments']['template']) && $config['PicoPygments']['template'] != "") $this->template=$config['PicoPygments']['template'];
      else $this->template="post";
    // the code block's class will become this, plus "language-$lang"
    if (isset($config['PicoPygments']['cssclass'])) $this->cssclass=$config['PicoPygments']['cssclass'];
      else $this->cssclass='pcpg';
    // Python command - default: "python3 -I" - yes, PHP apparently does PATH lookups.
    if (isset($config['PicoPygments']['python_cmd']) && $config['PicoPygments']['python_cmd'] != "") $this->python_cmd=$config['PicoPygments']['python_cmd'];
      else $this->python_cmd="python3 -I";
    // caching already parsed code blocks - make the directory if required
    if (isset($config['PicoPygments']['cache_dir'])) {
      if($config['PicoPygments']['cache_dir'] === FALSE || $config['PicoPygments']['cache_dir'] === '') unset($this->cache_dir);
      else $this->cache_dir=$config['PicoPygments']['cache_dir'];
    }
    if (isset($this->cache_dir)) {
      if(!is_dir($this->cache_dir)) mkdir($this->cache_dir,0700,TRUE);
      if(is_dir($this->cache_dir)) $this->__msg__("Picopyg cache dir is now ".$this->cache_dir);
      else {
        $this->__msg__("Failed to access directory ".$this->cache_dir .", continuing without caching.");
        unset($this->cache_dir);
      }
    }
    // display a label with the language in a corner?
    if (isset($config['PicoPygments']['label']) && $config['PicoPygments']['label'] === TRUE) $this->label=TRUE;
    // setting up formatteroptions string to pass to python.
    if (isset($config['PicoPygments']['formatteroptions']) && $config['PicoPygments']['formatteroptions'] !== FALSE) $this->formatteroptions=$config['PicoPygments']['formatteroptions'];
    // setting up lexeroptions string to pass to python.
    if (isset($config['PicoPygments']['lexeroptions']) && $config['PicoPygments']['lexeroptions'] !== FALSE) $this->lexeroptions=$config['PicoPygments']['lexeroptions'];

    //~ if($this->debug === TRUE) $this->array_info($config);
    // If a stylesheet isn't explicitely pointed to, use a fallback (with magic color variables!)
    if ( isset($config['PicoPygments']['stylesheet']) and $config['PicoPygments']['stylesheet'] != '' and substr($config['PicoPygments']['stylesheet'],0,1) === '/') $this->stylesheet=$config['PicoPygments']['stylesheet'];
    else {
        $style=$this->stylesheet_fallback;
        $files=array( 
            __DIR__.'/css/'.$this->cssprefix.$style.'.css',
            __DIR__.'/css/'.$style.'.css', 
         );
         foreach($files as $file) {
            if(file_exists($file)) {
                $this->stylesheet=str_replace($_SERVER['DOCUMENT_ROOT'],'',$file);
                break;
            }
        }
    }
    unset($c);
  }

  public function onMetaParsed(array  $meta)
  {
    // Only Do Things if pycopyg hasn't been disabled
    if(isset($meta['picopygments']) && $meta['picopygments'] === FALSE ) {
        $this->__msg__("function ". __FUNCTION__ .' PicoPygments disabled for this page');
        return;
    }
    $this->__msg__("function ". __FUNCTION__ .' found template "'. $meta['template'] .'"');
    // Only Do Things if we have the correct template
    if($meta['template'] === $this->template || $this->template === "all" ) $this->yeah=TRUE;
  }

  public function onContentParsed(&$content)
  {
    if($this->yeah === FALSE ) {
      $this->__msg__("We're not doing anything. Bye!");
      return;
    }
    if(trim($content)=="") {
      $this->__msg__("Content is empty. Bye!");
      return;
    }

    /////////////////////////////// SERIOUSLY BEGINS HERE ////////////////////////////////
    //~ $this->__msg__("","Dumping \$content: ".$content);

    // dom document of $content
    $dom=new DOMDocument();
    $dom->preserveWhiteSpace = true;
    // $content is a html fragment. We like to modify its content with DOMDocument, but it prefers complete documents and completes them
    // if necessary. This leads to strangeness. The best thing I found is to give it a complete document with all required headers and tags,
    // then strip those off in the end. That way DOMDocument doesn't meddle, and we have control. Hah, hopefully.
    $dom->loadHTML('<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>'.$content.'</body></html>',
    LIBXML_NONET|LIBXML_COMPACT|LIBXML_HTML_NODEFDTD|LIBXML_NOWARNING);
    $trim_before=92; // What we need to trim when saving (the above string added to $content)
    $trim_after=-15; //   - " -

    //~ $this->__msg__("Loaded HTML \$content into \$dom","Dumping \$dom: ".$dom->saveHTML());

    // collecting code nodes that have the string fragment $this->langprefix in their classes
    $xpath=new domXPath($dom);
    $query='//code[contains(concat(" ", @class, " "),"'. $this->langprefix .'")]'; //stackoverflow.com/a/1390680
    $xpathQuery=$xpath->query($query);
    if(count($xpathQuery) === 0) { $this->__msg__('no code blocks with "'.$this->langprefix .'*" classes? Nothing to do, we\'re out!'); return; }

    // static base for cached file's names - need to add code block specific language & checksum later
    $filename_base=str_replace(array('"','=',',',' '),'-',$this->formatteroptions .'-'. $this->lexeroptions);
    // assuming that pygments will always escape a literal < or > as "&lt;" or "&gt;",
	// a string like this should never show up in its output, and be a safe separator:
    $sep='<<<>>><<<>>><<<>>>';
    $exe_str='';
    $strlen_langprefix=strlen($this->langprefix);
    $new_item=array();
    $new_item_filename=array();
    for($i=0;$i<count($xpathQuery);$i++) {
      $langclass=$xpathQuery->item($i)->getAttribute('class');
      $nv=$xpathQuery->item($i)->nodeValue;
      $lang=substr($langclass,$strlen_langprefix); // remove classprefix, usually "language-"
      // sanitize the language name, because parsedown-extra doesn't (see README.md)
      $lang=ltrim($lang,'{.#'); $lang=rtrim($lang,'}');
      $this->__msg__("Language class: $langclass - Language: $lang");
      $this->__msg__("\$xpathQuery - processing code node \$item[$i]","nodeValue: $nv");
      $checksum=hash('crc32b',$nv); // a non-cryptographic, short'n'fast hash. md5($nv) maybe safer because longer, but slower?
      $filename=$checksum.'-'.$filename_base.'-'.$lang;
      $new_item_filename[$i]=$filename;
      if(isset($this->cache_dir) && file_exists($this->cache_dir .'/'. $filename))
      { $filename=$this->cache_dir .'/'. $filename;
        $new_item[$i]=file_get_contents($filename);
        $this->__msg__("Got cached contents from ".$filename,"code node after: (\$new_item[$i]): $new_item[$i]");
        }
      else {
        // file isn't cached (yet): append this one to what will become the final python command $exe_str
        $nv=addslashes($nv);
        $exe_str=$exe_str.<<<PYGMENTS_CODE_ENDS_HERE
code="""$nv"""
try:
    lexer=get_lexer_by_name("$lang", $this->lexeroptions )
    lexer.add_filter('tokenmerge')
    print(highlight(code, lexer, HtmlFormatter( $this->formatteroptions )),end = "");
except:
    pass
print('$sep',end = "");
PYGMENTS_CODE_ENDS_HERE;
        $new_item[$i]='';
      }
    } // end iterating through $xpathQuery

    // the whole python-pygments block is conditional to this:
    if($exe_str !== '') {
      // add imports to beginning
      $exe_str='from pygments import highlight;from pygments.lexers import get_lexer_by_name;from pygments.formatters import HtmlFormatter;'.$exe_str;
      $exe_str = escapeshellarg($exe_str);
      $this->__msg__('Sending \$exe_str to '.$this->python_cmd .' - formatteroptions: '.$this->formatteroptions .', lexeroptions: '.$this->lexeroptions,'\$exe_str: " -c": '.$exe_str);
      // execute that command, implode result into long string, then explode it into array again divided by $sep:
      $output = array();
      exec($this->python_cmd .' -c '.$exe_str,$output);
      $exe_str = implode("\n",$output);
      $this->__msg__("","Raw result: $exe_str");
      $output = explode($sep,$exe_str);
      $output_count=count($output)-1;
      unset($output[$output_count]); // there's always a superfluous $sep at the end, creating an additional empty element
      $this->__msg__("We have $output_count pygment output results");
      // ideally count($new_item) and $output_count should be identical.
      for($i=0,$j=0;$i<count($new_item),$j<$output_count;$i++) {
        $output[$j]=trim($output[$j]);
        if($new_item[$i] === '') {
          if($output[$j] !== '') {
            $new_item[$i]=$output[$j];
            $this->__msg__("Assigned pygments \$output[$j] to code block (\$new_item[$i])","Content: $new_item[$i]");
            if(isset($this->cache_dir) && is_dir($this->cache_dir)) {
              $filename=$this->cache_dir .'/'. $new_item_filename[$i];
              $this->__msg__("Writing HTML to cache: ".$filename);
              file_put_contents($filename,$new_item[$i]);
              }
          }
          $j++;
        }
      }
    } // end if($exe_str !== '')

    // replace (code nodes with language-class) (previous xpathQuery) with the syntax-highlighted ones
    for($i=0;$i<count($new_item);$i++) {
      $this->__msg__("Replacing original code block with \$new_item[$i] (unlesss \$new_item[$i] is empty).", "Content: $new_item[$i]");
      if(trim($new_item[$i]) === '') continue; // if it's empty, don't replace anything, use the original code block
      $item=$xpathQuery->item($i);
      $lang=$item->getAttribute('class');
      $classes=$this->cssclass .' '.$lang; // let's pull them together for easier application

      $fragment = $dom->createDocumentFragment();

      if($item->parentNode->tagName === 'pre' && $this->label === TRUE) {
        $item=$item->parentNode;
        $pre = $dom->createElement('pre');
        $pre->setAttribute("class",$this->cssclass);
        $new_item[$i]='<div class="corner">'.substr($lang,$strlen_langprefix).'</div><code class="'.$classes.'">'.$new_item[$i].'</code>';
        $fragment->appendXML($new_item[$i]);
        $fragment->encoding="UTF-8";
        $pre->appendChild($fragment);
        $item->parentNode->replaceChild($pre,$item);
      }
      else {
        // if the code block is enclosed by a pre, set the cssclass on it
        if($item->parentNode->tagName === 'pre') $item->parentNode->setAttribute("class",$this->cssclass);
        $code = $dom->createElement('code');
        $code->setAttribute("class",$classes);
        $fragment->appendXML($new_item[$i]);
        $fragment->encoding="UTF-8";
        $code->appendChild($fragment);
        $item->parentNode->replaceChild($code,$item);
      }
    } // end replace the code nodes

    $content = $dom->saveHTML();
    // remove the tag we added at loadHTML
    $content = substr($content,$trim_before,$trim_after);

    // Finally, adding stylesheet:
    $content = '<link rel="stylesheet" href="'. $this->stylesheet .'" type="text/css" />'.$content;
  }
}
